
package com.kding.spider;

import android.os.Handler;
import android.os.Looper;
import android.util.Log;

import java.lang.ref.WeakReference;
import java.util.HashMap;
import java.util.regex.Pattern;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 * Created by lishaojie on 2019/9/12
 */

public class SpiderBuilder {

    private final static Pattern HOST_PATTEERN = Pattern.compile("(https?|ftp|file)://" +
            "[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", Pattern.CASE_INSENSITIVE);

    private Handler spiderHandler = new Handler(Looper.getMainLooper());
    private WeakReference<SpiderCallback> spiderCallback;
    private SpiderCallback mSpiderCallback;
    private HashMap<String, String> params;
    private HashMap<String, String> headers;
    private String url = "";
    //分隔符
    private String appKey = "";
    private int connectionTimeout = 3000;
    private String userAgent = "ie7：mozilla/4.0 (compatible; msie 7.0b; windows nt 6.0)";
    private HashMap<String, String> cookie;
    /**
     * 是否是https
     */
    private boolean isHttps;


    /**
     * 参数
     */
    public final SpiderBuilder params(String key, String value) {
        if (params == null) {
            params = new HashMap<>(4);
        }
        params.put(key, value);
        return this;
    }

    /**
     * 头信息
     */
    public final SpiderBuilder headers(String key, String value) {
        if (headers == null) {
            headers = new HashMap<>(4);
        }
        headers.put(key, value);
        return this;
    }

    /**
     * 爬取的地址
     */
    public final SpiderBuilder connection(String url) {
        this.url = url;
        return this;
    }

    /**
     * userAgent，伪造的浏览器信息
     */
    public final SpiderBuilder userAgent(String userAgent) {
        this.userAgent = userAgent;
        return this;
    }

    /**
     * cookie
     */
    public final SpiderBuilder cookie(String key, String value) {
        if (cookie == null) {
            cookie = new HashMap<>(4);
        }
        cookie.put(key, value);
        return this;
    }

    /**
     * 超时时间
     */

    public final SpiderBuilder connectionTimeout(int timeOut) {
        this.connectionTimeout = timeOut;
        return this;
    }

    public final SpiderBuilder isHttps(boolean isHttps) {
        this.isHttps = isHttps;
        return this;
    }

    public final SpiderBuilder appKey(String s) {
        appKey = s;
        return this;
    }

    /**
     * 爬虫回调
     */
    public final SpiderBuilder setCallBack(SpiderCallback callback) {
        spiderCallback = new WeakReference(callback);
        return this;
    }

    /**
     * 爬虫回调
     */
    public final SpiderBuilder setSpiderCallBack(SpiderCallback callback) {
        mSpiderCallback = callback;
        return this;
    }


    private Connection ready() {
        Connection connection = Jsoup.connect(url)
                .timeout(connectionTimeout);
        if (headers != null) {
            connection.headers(headers);
        }

        if (params != null) {
            connection.data(params);
        }
        if (cookie != null) {
            connection.cookies(cookie);
        }

        return connection.userAgent(userAgent);
    }

    private String handleHost(String origin) {
        origin = origin.split(appKey)[1];
        String result = Base64Utils.decodeHost(origin);
        if (result.startsWith("https://") || result.startsWith("http://")) {
            return result;
        } else {
            if (isHttps) {
                result = "https://" + result;
            } else {
                result = "http://" + result;
            }
        }
        return result;
    }

    private void handleResult(final Document doc) {
        try {
            final String result = handleHost(doc.getElementsByClass("file_content markdown-body").text());
            if (!checkResult(result)) {
                throw new NullPointerException("host is null");
            }
            spiderHandler.post(new Runnable() {
                @Override
                public void run() {
                    if (spiderCallback != null && spiderCallback.get() != null) {
                        spiderCallback.get().onSuccess(result, new SpiderDoc(doc));
                    } else if (mSpiderCallback != null) {
                        mSpiderCallback.onSuccess(result, new SpiderDoc(doc));
                    }
                }
            });
        } catch (final Exception e) {
            handleError(e);
        }

    }

    private void handleError(final Exception e) {
        spiderHandler.post(new Runnable() {
            @Override
            public void run() {
                Log.e("SpiderBuilder", e.getMessage());
                if (spiderCallback != null && spiderCallback.get() != null) {
                    spiderCallback.get().onError(e);
                } else if (mSpiderCallback != null) {
                    mSpiderCallback.onError(e);
                }
            }
        });
    }


    private boolean checkResult(String url) {
        return HOST_PATTEERN.matcher(url).find();
    }

    /**
     * get方式
     */
    public void get() {
        new Thread(new Runnable() {
            @Override
            public void run() {
                try {
                    Document doc = ready().get();
                    handleResult(doc);
                } catch (Exception e) {
                    handleError(e);
                }
            }
        }).start();


    }

    /**
     * post 方式
     */
    public void post() {
        new Thread(new Runnable() {
            @Override
            public void run() {
                try {
                    Document doc = ready().post();
                    handleResult(doc);
                } catch (Exception e) {
                    handleError(e);

                }
            }
        }).start();

    }

    public interface SpiderCallback {
        /**
         * @param host 知乎域名
         */
        void onSuccess(String host, SpiderDoc doc);

        /**
         * @param e 错误回调
         */
        void onError(Exception e);

    }
}
